# -*- coding: utf-8 -*-
# @Date    : 2021/4/21
# @Author  : Maoxian

# 斗图吧 https://www.doutub.com/

import os
import requests
from lxml import etree

base_url = "https://www.doutub.com/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42"
}


def pic_down(url, name, save_path='./doutub'):
    pic_type = url.split('.')[-1]
    pic_res = requests.get(url, headers=headers).content

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    with open(f'{save_path}/{name}.{pic_type}', 'wb') as f:
        f.write(pic_res)

    print(f'图片 [{name}] 下载完成.')


def spider_doutub(url):
    res = requests.get(url, headers=headers).text
    tree = etree.HTML(res)

    # 图片名称
    pic_names = tree.xpath('//div[@class="new-list clearfix"]/div/a/span/text()')
    # 图片url
    pic_urls = tree.xpath('//div[@class="new-list clearfix"]/div/a/img/@src')

    # print(pic_names, pic_urls)
    if len(pic_names) != len(pic_urls):
        raise Exception('url和文字对应不上，请检查')

    for pic_name, pic_url in zip(pic_names, pic_urls):
        pic_down(pic_url, pic_name)


spider_doutub(base_url)
